{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from requests_html import HTMLSession\n",
    "session = HTMLSession()\n",
    "url = 'https://zz.lianjia.com/zufang/'\n",
    "resp = session.get(url)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 先渲染一下，然后再提取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "ename": "RuntimeError",
     "evalue": "Cannot use HTMLSession within an existing event loop. Use AsyncHTMLSession instead.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-4-6512cc6b601d>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mresp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mhrefs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mresp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mxpath\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'.//div[@class=\"content__pg\"]/a/@href'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mhrefs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\python\\Anaconda3\\lib\\site-packages\\requests_html.py\u001b[0m in \u001b[0;36mrender\u001b[1;34m(self, retries, script, wait, scrolldown, sleep, reload, timeout, keep_page)\u001b[0m\n\u001b[0;32m    584\u001b[0m         \"\"\"\n\u001b[0;32m    585\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 586\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbrowser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbrowser\u001b[0m  \u001b[1;31m# Automatically create a event loop and browser\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    587\u001b[0m         \u001b[0mcontent\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    588\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\python\\Anaconda3\\lib\\site-packages\\requests_html.py\u001b[0m in \u001b[0;36mbrowser\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    727\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0masyncio\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_event_loop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    728\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_running\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 729\u001b[1;33m                 \u001b[1;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Cannot use HTMLSession within an existing event loop. Use AsyncHTMLSession instead.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    730\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_browser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun_until_complete\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbrowser\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    731\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_browser\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mRuntimeError\u001b[0m: Cannot use HTMLSession within an existing event loop. Use AsyncHTMLSession instead."
     ]
    }
   ],
   "source": [
    "## 执行js代码，将网页上异步渲染的内容，结合到html中去，再把结果返回给resp对象\n",
    "resp.html.render()\n",
    "hrefs = resp.html.xpath('.//div[@class=\"content__pg\"]/a/@href')\n",
    "print(hrefs)\n",
    "\n",
    "## 输出：['javascript:;', '/zufang/#contentList', '/zufang/pg2/#contentList', '/zufang/pg3/#contentList', '/zufang/pg100/#contentList', '/zufang/pg2/#contentList']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 直接解析html的思路1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['1']\n",
      "['100']\n"
     ]
    }
   ],
   "source": [
    "start = resp.html.xpath('.//div[@class=\"content__pg\"]/@data-curpage')\n",
    "end = resp.html.xpath('.//div[@class=\"content__pg\"]/@data-totalpage')\n",
    "print(start)\n",
    "print(end)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://zz.lianjia.com/zufang/pg1/#contentList\n",
      "https://zz.lianjia.com/zufang/pg2/#contentList\n",
      "https://zz.lianjia.com/zufang/pg3/#contentList\n",
      "https://zz.lianjia.com/zufang/pg4/#contentList\n",
      "https://zz.lianjia.com/zufang/pg5/#contentList\n",
      "https://zz.lianjia.com/zufang/pg6/#contentList\n",
      "https://zz.lianjia.com/zufang/pg7/#contentList\n",
      "https://zz.lianjia.com/zufang/pg8/#contentList\n",
      "https://zz.lianjia.com/zufang/pg9/#contentList\n",
      "https://zz.lianjia.com/zufang/pg10/#contentList\n",
      "https://zz.lianjia.com/zufang/pg11/#contentList\n",
      "https://zz.lianjia.com/zufang/pg12/#contentList\n",
      "https://zz.lianjia.com/zufang/pg13/#contentList\n",
      "https://zz.lianjia.com/zufang/pg14/#contentList\n",
      "https://zz.lianjia.com/zufang/pg15/#contentList\n",
      "https://zz.lianjia.com/zufang/pg16/#contentList\n",
      "https://zz.lianjia.com/zufang/pg17/#contentList\n",
      "https://zz.lianjia.com/zufang/pg18/#contentList\n",
      "https://zz.lianjia.com/zufang/pg19/#contentList\n",
      "https://zz.lianjia.com/zufang/pg20/#contentList\n",
      "https://zz.lianjia.com/zufang/pg21/#contentList\n",
      "https://zz.lianjia.com/zufang/pg22/#contentList\n",
      "https://zz.lianjia.com/zufang/pg23/#contentList\n",
      "https://zz.lianjia.com/zufang/pg24/#contentList\n",
      "https://zz.lianjia.com/zufang/pg25/#contentList\n",
      "https://zz.lianjia.com/zufang/pg26/#contentList\n",
      "https://zz.lianjia.com/zufang/pg27/#contentList\n",
      "https://zz.lianjia.com/zufang/pg28/#contentList\n",
      "https://zz.lianjia.com/zufang/pg29/#contentList\n",
      "https://zz.lianjia.com/zufang/pg30/#contentList\n",
      "https://zz.lianjia.com/zufang/pg31/#contentList\n",
      "https://zz.lianjia.com/zufang/pg32/#contentList\n",
      "https://zz.lianjia.com/zufang/pg33/#contentList\n",
      "https://zz.lianjia.com/zufang/pg34/#contentList\n",
      "https://zz.lianjia.com/zufang/pg35/#contentList\n",
      "https://zz.lianjia.com/zufang/pg36/#contentList\n",
      "https://zz.lianjia.com/zufang/pg37/#contentList\n",
      "https://zz.lianjia.com/zufang/pg38/#contentList\n",
      "https://zz.lianjia.com/zufang/pg39/#contentList\n",
      "https://zz.lianjia.com/zufang/pg40/#contentList\n",
      "https://zz.lianjia.com/zufang/pg41/#contentList\n",
      "https://zz.lianjia.com/zufang/pg42/#contentList\n",
      "https://zz.lianjia.com/zufang/pg43/#contentList\n",
      "https://zz.lianjia.com/zufang/pg44/#contentList\n",
      "https://zz.lianjia.com/zufang/pg45/#contentList\n",
      "https://zz.lianjia.com/zufang/pg46/#contentList\n",
      "https://zz.lianjia.com/zufang/pg47/#contentList\n",
      "https://zz.lianjia.com/zufang/pg48/#contentList\n",
      "https://zz.lianjia.com/zufang/pg49/#contentList\n",
      "https://zz.lianjia.com/zufang/pg50/#contentList\n",
      "https://zz.lianjia.com/zufang/pg51/#contentList\n",
      "https://zz.lianjia.com/zufang/pg52/#contentList\n",
      "https://zz.lianjia.com/zufang/pg53/#contentList\n",
      "https://zz.lianjia.com/zufang/pg54/#contentList\n",
      "https://zz.lianjia.com/zufang/pg55/#contentList\n",
      "https://zz.lianjia.com/zufang/pg56/#contentList\n",
      "https://zz.lianjia.com/zufang/pg57/#contentList\n",
      "https://zz.lianjia.com/zufang/pg58/#contentList\n",
      "https://zz.lianjia.com/zufang/pg59/#contentList\n",
      "https://zz.lianjia.com/zufang/pg60/#contentList\n",
      "https://zz.lianjia.com/zufang/pg61/#contentList\n",
      "https://zz.lianjia.com/zufang/pg62/#contentList\n",
      "https://zz.lianjia.com/zufang/pg63/#contentList\n",
      "https://zz.lianjia.com/zufang/pg64/#contentList\n",
      "https://zz.lianjia.com/zufang/pg65/#contentList\n",
      "https://zz.lianjia.com/zufang/pg66/#contentList\n",
      "https://zz.lianjia.com/zufang/pg67/#contentList\n",
      "https://zz.lianjia.com/zufang/pg68/#contentList\n",
      "https://zz.lianjia.com/zufang/pg69/#contentList\n",
      "https://zz.lianjia.com/zufang/pg70/#contentList\n",
      "https://zz.lianjia.com/zufang/pg71/#contentList\n",
      "https://zz.lianjia.com/zufang/pg72/#contentList\n",
      "https://zz.lianjia.com/zufang/pg73/#contentList\n",
      "https://zz.lianjia.com/zufang/pg74/#contentList\n",
      "https://zz.lianjia.com/zufang/pg75/#contentList\n",
      "https://zz.lianjia.com/zufang/pg76/#contentList\n",
      "https://zz.lianjia.com/zufang/pg77/#contentList\n",
      "https://zz.lianjia.com/zufang/pg78/#contentList\n",
      "https://zz.lianjia.com/zufang/pg79/#contentList\n",
      "https://zz.lianjia.com/zufang/pg80/#contentList\n",
      "https://zz.lianjia.com/zufang/pg81/#contentList\n",
      "https://zz.lianjia.com/zufang/pg82/#contentList\n",
      "https://zz.lianjia.com/zufang/pg83/#contentList\n",
      "https://zz.lianjia.com/zufang/pg84/#contentList\n",
      "https://zz.lianjia.com/zufang/pg85/#contentList\n",
      "https://zz.lianjia.com/zufang/pg86/#contentList\n",
      "https://zz.lianjia.com/zufang/pg87/#contentList\n",
      "https://zz.lianjia.com/zufang/pg88/#contentList\n",
      "https://zz.lianjia.com/zufang/pg89/#contentList\n",
      "https://zz.lianjia.com/zufang/pg90/#contentList\n",
      "https://zz.lianjia.com/zufang/pg91/#contentList\n",
      "https://zz.lianjia.com/zufang/pg92/#contentList\n",
      "https://zz.lianjia.com/zufang/pg93/#contentList\n",
      "https://zz.lianjia.com/zufang/pg94/#contentList\n",
      "https://zz.lianjia.com/zufang/pg95/#contentList\n",
      "https://zz.lianjia.com/zufang/pg96/#contentList\n",
      "https://zz.lianjia.com/zufang/pg97/#contentList\n",
      "https://zz.lianjia.com/zufang/pg98/#contentList\n",
      "https://zz.lianjia.com/zufang/pg99/#contentList\n",
      "https://zz.lianjia.com/zufang/pg100/#contentList\n"
     ]
    }
   ],
   "source": [
    "base_url = 'https://zz.lianjia.com/zufang/pg{}/#contentList'\n",
    "for i in range(int(start[0]), int(end[0]) + 1):\n",
    "    print(base_url.format(i))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
